home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk C actions compiler
- */
- #include <stdio.h>
-
- #include "bawk.h"
-
- act_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = ACTION;
- return stmt_compile( actbuf );
- }
-
- pat_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = PATTERN;
- return stmt_compile( actbuf );
- }
-
- stmt_compile( actbuf )
- char *actbuf; /* where tokenized actions are compiled into */
- {
- /*
- * Read and tokenize C actions from current input file into the
- * action buffer. Strip out comments and whitespace in the
- * process.
- */
- void storeptr();
- char *actptr, /* actbuf pointer */
- *cp, /* work pointer */
- buf[MAXLINELEN]; /* string buffer */
- int braces, /* counts '{}' pairs - return when 0 */
- parens, /* counts '()' pairs */
- i, /* temp */
- c; /* current input character */
-
- braces = parens = 0;
- actptr = actbuf;
- while ( (c = getcharacter()) != -1 )
- {
- /*
- * Skip over spaces, tabs and newlines
- */
- if ( c==' ' || c=='\t' || c=='\n' ) continue;
- if ( c=='#' ) {
- /*
- * Skip comments. Comments start with a '#' and
- * end at the next newline.
- */
- while ( (c = getcharacter()) != -1 && c!='\n' )
- ;
- continue;
- }
-
- if ( c=='{' ) {
- if ( Where==PATTERN ) {
- /*
- * We're compiling a pattern. The '{' marks
- * the beginning of an action statement.
- * Push the character back and return.
- */
- ungetcharacter( '{' );
- break;
- }
- else {
- /*
- * We must be compiling an action statement.
- * '{'s mark beginning of action or compound
- * statements.
- */
- ++braces;
- *actptr++ = T_LBRACE;
- }
- }
- else if ( c=='}' ) {
- *actptr++ = T_RBRACE;
- if ( ! --braces )
- /*
- * Found the end of the action string
- */
- break;
- }
- else if ( c=='(' ) {
- ++parens;
- *actptr++ = T_LPAREN;
- }
- else if ( c==')' ) {
- if ( --parens < 0 ) error( "mismatched '()'", ACT_ERROR );
- *actptr++ = T_RPAREN;
- }
- else if ( c==',' && !braces && !parens && Where==PATTERN ) {
- /*
- * found a comma outside of any braces or parens-
- * this must be a regular expression seperator.
- */
- ungetcharacter( ',' );
- break;
- }
- /*
- * Check if it's a regular expression:
- */
- else if ( c=='/' ) {
- /*
- * A '/' inside a pattern string starts a regular expression. Inside
- * action strings, a '/' is the division operator.
- */
- if ( Where == PATTERN ) goto dopattern;
- else *actptr++ = T_DIV;
- }
- else if ( c=='@' ) {
- dopattern:
- /*
- * Within action strings, only the '@' may be used to delimit regular
- * expressions
- */
- *actptr++ = T_REGEXP;
- ungetcharacter( c );
- actptr += re_compile( actptr );
- }
- /*
- * symbol, string or constant:
- */
- else if ( alpha( c ) ) {
- /*
- * It's a symbol reference. Copy the symbol into string buffer.
- */
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 && alphanum( c ) );
- ungetcharacter( c );
- *cp = 0;
- /*
- * Check if a keyword, builtin function or variable.
- */
- if (( c = iskeyword( buf ) ) != 0) *actptr++ = c;
- else if (( i = isfunction( buf ) ) != 0) {
- *actptr++ = T_FUNCTION;
- storeint( actptr, i );
- actptr += sizeof( i );
- }
- else {
- /*
- * It's a symbol name.
- */
- *actptr++ = T_VARIABLE;
- if ( (cp = (char *) findvar( buf )) == 0 )
- cp = (char *) addvar( buf );
- storeptr( actptr, cp );
- actptr += sizeof( cp );
- }
- }
- else if ( c == '"' ) {
- /*
- * It's a string constant
- */
- *actptr++ = T_STRING;
- actptr = str_compile( actptr, '"' );
- }
- else if ( c == '\'' ) {
- /*
- * It's a character constant
- */
- *actptr++ = T_CONSTANT;
- str_compile( buf, '\'' );
- storeint( actptr, *buf );
- actptr += sizeof( i );
- }
- else if ( num( c ) ) {
- /*
- * It's a numeric constant
- */
- *actptr++ = T_CONSTANT;
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 && num(c) );
- ungetcharacter( c );
- *cp = 0;
- storeint( actptr, atoi( buf ) );
- actptr += sizeof( i );
- }
- /*
- * unary operator:
- */
- else if ( c == '$' ) *actptr++ = T_DOLLAR;
- /*
- * or binary operator:
- */
- else if ( c == '=' ) {
- if ( (c=getcharacter()) == '=' ) *actptr++ = T_EQ;
- else {
- ungetcharacter( c );
- *actptr++ = T_ASSIGN;
- }
- }
- else if ( c == '!' ) {
- if ( (c=getcharacter()) == '=' ) *actptr++ = T_NE;
- else {
- ungetcharacter( c );
- *actptr++ = T_LNOT;
- }
- }
-
- else if ( c == '<' ) {
- if ( (c=getcharacter()) == '<' ) *actptr++ = T_SHL;
- else if ( c == '=' ) *actptr++ = T_LE;
- else {
- ungetcharacter( c );
- *actptr++ = T_LT;
- }
- }
-
- else if ( c == '>' ) {
- if ( (c=getcharacter()) == '>' ) *actptr++ = T_SHR;
- else if ( c == '=' ) *actptr++ = T_GE;
- else {
- ungetcharacter( c );
- *actptr++ = T_GT;
- }
- }
-
- else if ( c == '&' ) {
- if ( (c=getcharacter()) == '&' ) *actptr++ = T_LAND;
- else {
- ungetcharacter( c );
- *actptr++ = T_AND;
- }
- }
-
- else if ( c == '|' ) {
- if ( (c=getcharacter()) == '|' ) *actptr++ = T_LIOR;
- else {
- ungetcharacter( c );
- *actptr++ = T_IOR;
- }
- }
- else if ( c == '+' ) {
- if ( (c=getcharacter()) == '+' ) *actptr++ = T_INCR;
- else {
- ungetcharacter( c );
- *actptr++ = T_ADD;
- }
- }
-
- else if ( c == '-' ) {
- if ( (c=getcharacter()) == '-' ) *actptr++ = T_DECR;
- else {
- ungetcharacter( c );
- *actptr++ = T_SUB;
- }
- }
-
- /*
- * punctuation
- */
- else if ( instr( c, "[](),;*/%+-^~" ) ) *actptr++ = c;
-
- else {
- /*
- * Bad character in input line
- */
- error( "lexical error", ACT_ERROR );
- }
-
- if ( actptr >= Workbuf + MAXWORKBUFLEN )
- error( "action too long", MEM_ERROR );
- }
- if ( braces || parens )
- error( "mismatched '{}' or '()'", ACT_ERROR );
-
- *actptr++ = T_EOF;
-
- return actptr - actbuf;
- }
-
- char *
- str_compile( str, delim )
- char *str, delim;
- {
- /*
- * Compile a string from current input file into the given string buffer.
- * Stop when input character is the delimiter in "delim". Returns a
- * pointer to the first character after the string.
- */
- int c;
- char buf[ MAXLINELEN ];
-
- while ( (c = getcharacter()) != -1 && c != delim) {
- if ( c == '\\' ) {
- switch ( c = getcharacter() ) {
- case -1:
- goto err;
- case 'b':
- c = '\b';
- break;
- case 'n':
- c = '\n';
- break;
- case 't':
- c = '\t';
- break;
- case 'f':
- c = '\f';
- break;
- case 'r':
- c = '\r';
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- *buf = c;
- for ( c=1; c<3; ++c ) {
- if ( (buf[c]=getcharacter()) == -1 ) goto err;
- }
- buf[c] = 0;
- sscanf( buf, "%o", &c );
- break;
- case '\n':
- if ( getcharacter() == -1 ) goto err;
- default:
- if ( (c = getcharacter()) == -1 ) goto err;
- }
- }
- *str++ = c;
- }
- *str++ = 0;
-
- return str;
- err:
- sprintf( buf, "missing %c delimiter", delim );
- error( buf, 4 );
- }
-
- storeint( ip, i )
- int *ip, i;
- {
- return *ip = i;
- }
- void
- storeptr( pp, p )
- char **pp, *p;
- {
- *pp = p;
- }
-
- fetchint( ip )
- int *ip;
- {
- return *ip;
- }
-
- char *
- fetchptr( pp )
- char **pp;
- {
- return *pp;
- }
-
- getoken() {
- char *cp;
- int i;
-
- switch ( Token = *Actptr++ ) {
- case T_STRING:
- case T_REGEXP:
- Value.dptr = Actptr;
- Actptr += strlen( Actptr ) + 1;
- break;
- case T_VARIABLE:
- Value.dptr = fetchptr( Actptr );
- Actptr += sizeof( cp );
- break;
- case T_FUNCTION:
- case T_CONSTANT:
- Value.ival = fetchint( Actptr );
- Actptr += sizeof( i );
- break;
- case T_EOF:
- --Actptr;
- default:
- Value.dptr = 0;
- }
-
- #ifdef DEBUG
- if ( Debug > 1 )
- printf( "Token='%c' (0x%x), Value=%d\n",
- Token,Token,Value.ival );
- #endif
-
- return Token;
- }